# import libraries
from bs4 import BeautifulSoup
import requests
import time
import datetime
import smtplib
# Connect to the website
URL = 'https://www.amazon.com/Stick-Figure-Friendship-Sarcastic-Funny/dp/B076FTC6FQ/ref=pd_di_sccai_1/130-0859633-0246240?pd_rd_w=H6E0a&pf_rd_p=c9443270-b914-4430-a90b-72e3e7e784e0&pf_rd_r=3VATEZ8E3TYFF3QSXESN&pd_rd_r=7b765e13-638c-445f-a53b-a2518959fe33&pd_rd_wg=L5D2O&pd_rd_i=B076FTC6FQ&psc=1'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36", "Accept-Encoding":"gzip, deflate, br", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')
title = soup2.find(id = 'productTitle').get_text()
price = soup2.find(id='priceblock_ourprice').get_text()
print(title)
print(price)
I Got Your Back Stick Figure Friendship Novelty Sarcasm Teens Funny T Shirt $16.99
# Clean up the data a little bit
title = title.strip()
price = price.strip()[1:]
print(title)
print(price)
I Got Your Back Stick Figure Friendship Novelty Sarcasm Teens Funny T Shirt 16.99
import datetime
today = datetime.date.today()
print(today)
2021-09-14
import csv
header = ['Title', 'price', 'Date']
data = [title, price, today]
with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerow(data)
import pandas as pd
df = pd.read_csv(r'C:\Users\Heng Kimhak\AmazonWebScraperDataset.csv')
print(df)
Title price Date 0 I Got Your Back Stick Figure Friendship Novelt... 16.99 2021-09-14
# Now we are appending data to the CSV
with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(data)
def check_price():
URL = 'https://www.amazon.com/Stick-Figure-Friendship-Sarcastic-Funny/dp/B076FTC6FQ/ref=pd_di_sccai_1/130-0859633-0246240?pd_rd_w=H6E0a&pf_rd_p=c9443270-b914-4430-a90b-72e3e7e784e0&pf_rd_r=3VATEZ8E3TYFF3QSXESN&pd_rd_r=7b765e13-638c-445f-a53b-a2518959fe33&pd_rd_wg=L5D2O&pd_rd_i=B076FTC6FQ&psc=1'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36", "Accept-Encoding":"gzip, deflate, br", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')
title = soup2.find(id = 'productTitle').get_text()
price = soup2.find(id='priceblock_ourprice').get_text()
title = title.strip()
price = price.strip()[1:]
import datetime
today = datetime.date.today()
import csv
header = ['Title', 'price', 'Date']
data = [title, price, today]
with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerow(data)
# Runs check_price after a set time and inputs data into your CSV
# while(True):
# check_price()
# time.sleep(5)
import pandas as pd
df = pd.read_csv(r'C:\Users\Heng Kimhak\AmazonWebScraperDataset.csv')
print(df)
Title price Date 0 I Got Your Back Stick Figure Friendship Novelt... 16.99 2021-09-14 1 I Got Your Back Stick Figure Friendship Novelt... 16.99 2021-09-14